{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 数据运算"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 算数运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1     3\n",
       "S2     9\n",
       "S3    15\n",
       "dtype: int64"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#两列相加\n",
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter08.xlsx\",sheet_name = 0)\n",
    "#添加行索引\n",
    "df.index=[\"S1\",\"S2\",\"S3\"]\n",
    "df[\"C1\"]+df[\"C2\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1   -1\n",
       "S2   -1\n",
       "S3   -1\n",
       "dtype: int64"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#两列相减\n",
    "df[\"C1\"]-df[\"C2\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1     2\n",
       "S2    20\n",
       "S3    56\n",
       "dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#两列相乘\n",
    "df[\"C1\"]*df[\"C2\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1    0.500\n",
       "S2    0.800\n",
       "S3    0.875\n",
       "dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#两列相除\n",
    "df[\"C1\"]/df[\"C2\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1    0\n",
       "S2    3\n",
       "S3    6\n",
       "Name: C1, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#任意一列加/减一个常数\n",
    "df[\"C1\"]+1\n",
    "df[\"C1\"]-1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 比较运算符"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1    True\n",
       "S2    True\n",
       "S3    True\n",
       "dtype: bool"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter08.xlsx\",sheet_name = 0)\n",
    "#添加行索引\n",
    "df.index=[\"S1\",\"S2\",\"S3\"]\n",
    "df\n",
    "df[\"C1\"] > df[\"C2\"]\n",
    "df[\"C1\"] < df[\"C2\"]\n",
    "df[\"C1\"] != df[\"C2\"]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 汇总运算"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**count()非空值计数**  \n",
    "非空值计数就是计算摸一个区域中非空数值的个数  \n",
    "默认是求每一列非空值的个数  \n",
    "修改axis=1可以计算每一行的非空值个数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    3\n",
       "C2    3\n",
       "C3    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter08.xlsx\",sheet_name = 0)\n",
    "#添加行索引\n",
    "df.index=[\"S1\",\"S2\",\"S3\"]\n",
    "#计算每一列的非空个数\n",
    "df.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1    3\n",
       "S2    3\n",
       "S3    3\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#计算每一行的非空值个数\n",
    "df.count(axis =1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**sum()求和**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    12\n",
       "C2    15\n",
       "C3    18\n",
       "dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#默认对每一列求和\n",
    "df.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1     6\n",
       "S2    15\n",
       "S3    24\n",
       "dtype: int64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#添加参数axis对每一行求和\n",
    "df.sum(axis = 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对具体某一列求和\n",
    "df[\"C1\"].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**mean()求均值**  \n",
    "求均值就是对某一区域中的所有值进行算数平均值运算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    4.0\n",
       "C2    5.0\n",
       "C3    6.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#默认对每一列进行均值运算\n",
    "df.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1    2.0\n",
       "S2    5.0\n",
       "S3    8.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对每一行进行均值运算\n",
    "df.mean( axis =1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#指定某一列进行均值运算\n",
    "df[\"C1\"].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**max()求最大值**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    7\n",
       "C2    8\n",
       "C3    9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#默认返回每一列的最大值\n",
    "df.max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1    3\n",
       "S2    6\n",
       "S3    9\n",
       "dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#对每一行求最大值\n",
    "df.max( axis =1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 对某一列求最大值\n",
    "df[\"C1\"].max()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**min()求最小值使用方法和max()一致**"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**median()求中位数**  \n",
    "中位数就是将一组含有n个数据的序列X按照从小到大排列，位于中间位置的那个数，使用方法和其他函数一致"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    4.0\n",
       "C2    5.0\n",
       "C3    6.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.median()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**mode()求众数**  \n",
    "众数就是在一组数据中出现次数最多的数，使用方法与其他函数一致"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>C1</th>\n",
       "      <th>C2</th>\n",
       "      <th>C3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   C1  C2  C3\n",
       "0   1   1   3"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter08.xlsx\",sheet_name=1)\n",
    "df.index=[\"S1\",\"S2\",\"S3\"]\n",
    "df.mode()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**var()求方差✩**  \n",
    "方差是用来衡量一组数据离散程度的，使用方法与其他函数一致  \n",
    "**std()求标准差✩**  \n",
    "标准差是方差的平方根，二者都是用来表示数据的离散程度的，使用方法与其他函数一致"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**quantile()求分数位**  \n",
    "分数位是比中数位更加详细的基于位置的指标，有四分之一分数位、四分之二分数位、四分之三分数位，而四分之二分数位就是中数位。\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    4.0\n",
       "C2    5.0\n",
       "C3    6.0\n",
       "Name: 0.25, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_excel(r\"../Data/Chapter08.xlsx\",sheet_name=2)\n",
    "df.index=[\"S1\",\"S2\",\"S3\",\"S4\",\"S5\"]\n",
    "df\n",
    "df.quantile(0.25)#求四分之一分数位"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "C1    10.0\n",
       "C2    11.0\n",
       "C3    12.0\n",
       "Name: 0.75, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.quantile(0.75)#求四分之三分数位"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "S1     1.5\n",
       "S2     4.5\n",
       "S3     7.5\n",
       "S4    10.5\n",
       "S5    13.5\n",
       "Name: 0.25, dtype: float64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.quantile(0.25,axis = 1)#求每一行的四分之一分数位"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 相关性运算符✩\n",
    "相关性长用来衡量两个事之间的相关程度，用corr()函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>C1</th>\n",
       "      <th>C2</th>\n",
       "      <th>C3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>C1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C2</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>C3</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     C1   C2   C3\n",
       "C1  1.0  1.0  1.0\n",
       "C2  1.0  1.0  1.0\n",
       "C3  1.0  1.0  1.0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.corr()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "第8章 数据运算",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "320px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
